In [1]:
%reload_ext autotime
import pandas as pd
import requests
from pprint import pprint
import json
import torch
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor
from tqdm.auto import tqdm
pd.options.plotting.backend = "plotly"
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 100)
✔️ 4.72 s (2024-12-12T09:23:59/2024-12-12T09:24:03)
2024-12-12 09:24:02.300884: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. 2024-12-12 09:24:02.313220: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered 2024-12-12 09:24:02.329137: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered 2024-12-12 09:24:02.334326: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered 2024-12-12 09:24:02.348072: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2024-12-12 09:24:03.164700: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
In [7]:
df = pd.read_csv("results.csv").drop_duplicates(subset="panoid")
df
✔️ 20.4 ms (2024-12-12T09:45:14/2024-12-12T09:45:14)
Out[7]:
| Index | pid | n | time | anxiousness | latitude | longitude | geometry | panoid | panolat | panolon | panodate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 13 | P20006 | 1 | 2023-06-03T02:45:55Z | 3 | -36.892203 | 174.740125 | POINT (174.7401253 -36.89220256) | AF1QipPv0qhRSftIHefRk_j2bH78qeRkOMi_8renBk2B | -36.891827 | 174.740463 | 2015-01-03 |
| 1 | 15 | P20009 | 2 | 2023-05-17T04:54:48Z | 3 | -36.923191 | 174.748620 | POINT (174.7486203 -36.92319093) | AF1QipNBp6ZTo1nm7EvX5wssSbZlHV7VaBaHqPsp3zre | -36.923299 | 174.744895 | 2016-03-30 |
| 9 | 24 | P20021 | 1 | 2023-06-03T03:55:41Z | 1 | -38.140714 | 176.251862 | POINT (176.2518616 -38.14071376) | AF1QipNiSoDDA2omwMtXrIq76eZTz6u7JXaIMz2lY2HN | -38.140783 | 176.251412 | 2017-04-01 |
| 10 | 26 | P20021 | 3 | 2023-06-05T21:49:46Z | 3 | -36.894889 | 174.742775 | POINT (174.7427751 -36.89488899) | AF1QipP403MtkkXU3Xsf07nh0U0n0oGiZni4MZoO2mnD | -36.896569 | 174.743840 | 2022-12-05 |
| 12 | 28 | P20022 | 1 | 2023-04-25T02:58:09Z | 3 | -36.913364 | 174.729617 | POINT (174.7296168 -36.91336394) | AF1QipPcsqbdyRALZRYd_9SvX3O-yqcWyFO9dVUBXvNz | -36.913070 | 174.727857 | 2016-07-29 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 456 | 575 | P20304 | 2 | 2024-01-24T21:01:50Z | 0 | -36.922586 | 174.719866 | POINT (174.7198659 -36.92258555) | AF1QipPKcFXiyarq--Lz0yNktLmcEaL-H3eGCF6DWmUi | -36.923321 | 174.719057 | 2022-02-11 |
| 457 | 584 | P20314 | 2 | 2023-09-29T07:25:56Z | 0 | -36.899923 | 174.815464 | POINT (174.8154642 -36.89992279) | AF1QipPeF_kdk6m_6Xml3oxzrkyIga1lBDodkWZ1Re78 | -36.900272 | 174.815460 | 2015-06-08 |
| 458 | 597 | P20318 | 1 | 2023-09-29T05:14:26Z | 1 | -36.893269 | 174.744269 | POINT (174.7442694 -36.89326884) | AF1QipM9tZLFQoQbz4xJuy8LPLwCOf5xIyJhU8DxRdIv | -36.892409 | 174.745253 | 2018-06-11 |
| 459 | 598 | P20318 | 2 | 2023-09-29T23:47:24Z | 1 | -36.854193 | 174.729131 | POINT (174.7291308 -36.85419324) | AF1QipNqZ74PoOatlFqQRJGfJikvb6x_topGXM9XdRy3 | -36.855219 | 174.731047 | 2016-11-20 |
| 460 | 599 | P20318 | 3 | 2023-09-30T05:29:26Z | 1 | -36.893262 | 174.744341 | POINT (174.744341 -36.89326166) | AF1QipODQrKPpYlQauG8zcZSenz1FU3Tsg_ApD0UGvx4 | -36.892527 | 174.745399 | 2018-06-11 |
123 rows × 12 columns
In [3]:
# Loading this model needs about 22.69GB of GPU memory
model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
model = MllamaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)
✔️ 13.6 s (2024-12-12T09:24:04/2024-12-12T09:24:17)
The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
Loading checkpoint shards: 0%| | 0/5 [00:00<?, ?it/s]
In [10]:
for row in tqdm(df.sample(10).itertuples(index=False)):
panoid = row.panoid
image = Image.open(f"panoramas/{panoid}.jpg")
display(image.resize((image.width // 2, image.height // 2)))
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": """
This image is a panorama from Google Street View.
From the image, extract the following information, in JSON format:
green: Percentage of the image that is green space (e.g. parks, gardens, trees, grass etc.). A number from 0-100.
environment: Classify the nature of the environment in this image. Built up/green/residential/shops/cafes?. A string.
water: If you see any streams/ponds/rivers/ocean in the image, estimate the distance to the water in meters. A number. If there is no water, return 0.
obscured: Proportion of view obscured by buildings (how much of total line of sight is blocked by buildings in close proximity). A number from 0-100.
people: the number of people you see in the image
cars: the number of cars you see in the image
bikes: the number of bikes you see in the image
Do not include comments in your JSON response. Only respond with the JSON object. Make sure the JSON is valid.
"""},
{"type": "image"},
]
}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
image,
input_text,
add_special_tokens=False,
return_tensors="pt"
).to(model.device)
for retry in range(3):
output = model.generate(**inputs, max_new_tokens=5000)
result = processor.decode(output[0])
result = result[result.rindex("<|end_header_id|>") + len("<|end_header_id|>"):].strip().replace("<|eot_id|>", "")
print("Output:")
try:
result = json.loads(result)
pprint(result)
print("\n")
break
except json.JSONDecodeError:
print(f"Unable to parse: {result}")
✔️ 26.5 s (2024-12-12T09:51:22/2024-12-12T09:51:49)
0it [00:00, ?it/s]
Output:
{'bikes': 0,
'cars': 0,
'environment': 'built up',
'green': 0,
'obscured': 100,
'people': 0,
'water': 0}
Output:
{'bikes': 0,
'cars': 0,
'environment': 'shops',
'green': 0,
'obscured': 100,
'people': 0,
'water': 0}
Output:
{'bikes': 0,
'cars': 0,
'environment': 'built up',
'green': 0,
'obscured': 100,
'people': 0,
'water': 0}
Output:
{'bikes': 0,
'cars': 0,
'environment': 'built up',
'green': 0,
'obscured': 50,
'people': 0,
'water': 0}
Output:
{'bikes': 0,
'cars': 0,
'environment': 'Built up',
'green': 0,
'obscured': 100,
'people': 0,
'water': 0}
Output:
{'bikes': 0,
'cars': 0,
'environment': 'shops',
'green': 0,
'obscured': 0,
'people': 0,
'water': 0}
Output:
{'bikes': 0,
'cars': 0,
'environment': 'park',
'green': 65,
'obscured': 20,
'people': 0,
'water': 0}
Output:
{'bikes': 0,
'cars': 0,
'environment': 'residential',
'green': 50,
'obscured': 50,
'people': 0,
'water': 0}
Output:
{'bikes': 0,
'cars': 4,
'environment': 'built up',
'green': 0,
'obscured': 50,
'people': 17,
'water': 0}
Output:
{'bikes': 0,
'cars': 0,
'environment': 'residential',
'green': 50,
'obscured': 25,
'people': 0,
'water': 0}
In [ ]:
results = []
for row in tqdm(df.itertuples(index=False), total=len(df)):
panoid = row.panoid
image = Image.open(f"panoramas/{panoid}.png")
#display(image)
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": """
This image is a panorama from Google Street View.
From the image, extract the following information, in JSON format:
green: Percentage of the image that is green space (e.g. parks, gardens, trees, grass etc.). A number from 0-100.
environment: Classify the nature of the environment in this image. Built up/green/residential/shops/cafes?. A string.
water: If you see any streams/ponds/rivers/ocean in the image, estimate the distance to the water in meters. A number. If there is no water, return 0.
obscured: Proportion of view obscured by buildings (how much of total line of sight is blocked by buildings in close proximity). A number from 0-100.
people: the number of people you see in the image
cars: the number of cars you see in the image
bikes: the number of bikes you see in the image
Do not include comments in your JSON response. Only respond with the JSON object. Make sure the JSON is valid.
"""},
{"type": "image"},
]
}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
image,
input_text,
add_special_tokens=False,
return_tensors="pt"
).to(model.device)
for retry in range(3):
output = model.generate(**inputs, max_new_tokens=5000)
result = processor.decode(output[0])
result = result[result.rindex("<|end_header_id|>") + len("<|end_header_id|>"):].strip().replace("<|eot_id|>", "")
#print("Output:")
try:
result = json.loads(result)
#pprint(result)
row = row._asdict()
row.update(result)
results.append(row)
print("\n")
break
except json.JSONDecodeError:
print(f"Unable to parse: {result}")
results = pd.DataFrame(results)
results.to_csv("LLM_results.csv", index=False)
results